clear
cap log close

log using "log-files/1_Data_part1_initialsetup.smcl", replace smcl

timer on 2

*-----------------*
* 1. Computo data *
*-----------------*
	import excel "data/inputs/1_Bolivia_data_computo.xlsx", ///
		sheet("2.RecepcionSobres") firstrow clear

	keep if ElecActa == "Presidente y Vicepresidente"
	keep ComputoDate CC FPV MTS UCS MAS F PDC MNR Blancos Nulos PANBOL  ///
		Pais - Reci NumMesa CodVer Inscritos

	rename ComputoDate computo_date

	foreach x of varlist CC - Nulos {
		replace `x' = 0 if `x' == .
	}

	compress
	tempfile computo_data
	save `computo_data'

*---------------------------------------------------------------------*
* 2a. TREP transmission time stamps, using EstadoActa == "Registrada" *
*---------------------------------------------------------------------*
	import excel "data/inputs/1_Bolivia_data_trep.xlsx", ///
		sheet("Data") firstrow clear

	keep if EstadoActa == "Registrada" &  ///
		ElecActa == "Presidente y Vicepresidente"

	* These fifteen were transmitted from outside the app
	replace UltTransmisionDate = FechaRegistroenLog if ///
		UltTransmisionDate == . & FechaRegistroenLog != .

	* There are duplicates at the mesa level, so we keep the one with the
	* most recent date
	bys NumMesa (UltTransmisionDate): gen t = _n
	bys NumMesa: gen T = _N
	keep if t == T
	drop t T

	* Following the OAS, we use the Cómputo vote totals even with the
	* TREP time stamps, here we drop the TREP vote totals
	keep Pais - EstadoActa UltTransmisionDate

	rename UltTransmisionDate trep_date_ulttrans

	compress
	tempfile trep_fechas_ulttrans
	save `trep_fechas_ulttrans'

*---------------------------------------------------------------------*
* 2b. TREP verification time stamps, using EstadoActa == "Verificada" *
*---------------------------------------------------------------------*
	import excel "data/inputs/1_Bolivia_data_trep.xlsx", ///
		sheet("Data") firstrow clear

	keep if EstadoActa == "Verificada" &  ///
		ElecActa == "Presidente y Vicepresidente"

	* There are duplicates at the mesa level, so we keep the one with the
	* most recent date
	bys NumMesa (VerificadorDate): gen t = _n
	bys NumMesa: gen T = _N
	keep if t == T
	drop t T

	* Following the OAS, we use the Cómputo vote totals even with the
	* TREP time stamps, here we drop the TREP vote totals
	keep Pais - EstadoActa VerificadorDate

	rename VerificadorDate trep_date_verificador

	compress
	tempfile trep_fechas_verificadas
	save `trep_fechas_verificadas'

*----------------------------------------------------------------------------*
* 3a. Merging TREP transmission time stamps (from 2a) to Computo vote totals *
*----------------------------------------------------------------------------*
	use `computo_data', clear
	assert _N == 34555

	merge 1:1 NumMesa using `trep_fechas_ulttrans', ///
		keepusing(trep_date*)
	qui su _m if _m == 2
	assert `r(sum)' == 0
	drop _merge

	* Generating dummies to know which observations have a missing date
	gen trep_missing_date_ulttrans = (trep_date_ulttrans == .)
	qui su trep_missing_date_ulttrans if trep_missing_date_ulttrans == 1
	assert `r(sum)' == 377
	
	compress
	tempfile computo_data_3a
	save `computo_data_3a'

*----------------------------------------------------------------------------*
* 3b. Merging TREP verification time stamps (from 2b) to Computo vote totals *
*----------------------------------------------------------------------------*
	use `computo_data', clear
	assert _N == 34555
	
	merge 1:1 NumMesa using `trep_fechas_verificadas', ///
		keepusing(trep_date*)
	qui su _m if _m == 2
	assert `r(sum)' == 0	
	drop _merge

	* Generating dummys to know which observations have a missing date
	gen trep_missing_date_verificador = (trep_date_verificador == .)
	su trep_missing_date_verificador if trep_missing_date_verificador == 1
	assert `r(sum)' == 1511
	
	compress
	tempfile computo_data_3b
	save `computo_data_3b'

*---------------------------------------------------------------------------*
* 4. Generating three data sets with Cómputo vote totals and  		 	    *
*    TREP *verification* time stamps:  									    *
* a) DROPPING the 1,511 observations that were excluded from part of        *
*	 the OAS analysis because they do not have EstadoActa == "Verificada".  *	
*    Within identical time stamps, we sort observations randomly.			*			   
* b) KEEPING those 1,511 observations, and assigning them the maximum	    *
* 	 verification time stamp (following the OAS note that they are          *
* 	 "late reporters"). Within identical time stamps, we sort observations  *
*    randomly.												 			    *
* c) The same as 4a, but using the same exact sort order as the OAS (per    *
* 	 the replication data), rather than sorting randomly within  		    *
*    identical time stamps.                                                 *
*---------------------------------------------------------------------------*
* 5. Generating two data sets with Cómputo vote totals and  		 	    *
*    TREP *transmission* time stamps:                                       *
* a) DROPPING the 377 observations with missing transmission time stamps    *
* b) KEEPING those observations, and replacing the missing time stamps      *
*    with the median transmission time in the municipality                  *
*---------------------------------------------------------------------------*
	foreach x in 4a 4b 4c 5a 5b {
		* 4a. Verification date, dropping missings
		if "`x'"=="4a" { 		
			use `computo_data_3b', clear
			drop if trep_missing_date_verificador == 1
			local date_var = "trep_date_verificador"
			local file = "computo_verificadortrepdate_nomissings"
		}
	
		* 4b. Verification date, missings appended at the end
		else if "`x'"=="4b" {
			use `computo_data_3b', clear
			sum trep_date_verificador
			local max = r(max)
			replace trep_date_verificador = `max' if trep_date_verificador == .
			local date_var = "trep_date_verificador"
			local file = "computo_verificadortrepdate_missingsatthend"
		}
		
		* 4c. Verification date, dropping missings, OAS order
		else if "`x'"=="4c" {
			use `computo_data_3b', clear
			drop if trep_missing_date_verificador == 1
			
			* Get OAS sort order
			preserve
				use "data/inputs/1_Bolivia_OAS_Nooruddin_replication_dataset.dta", clear
				keep cum_ps_natl_share num_mesa_trep verificador_date
				rename num_mesa_trep NumMesa
				renvars cum_ps_natl_share verificador_date, postfix(_OAS)
				tempfile OAS_Nooruddin_order
				save `OAS_Nooruddin_order'
			restore

			merge 1:1 NumMesa using `OAS_Nooruddin_order'
			drop if _m == 2
			drop _merge
			rename cum_ps_natl_share_OAS x /* Sort order within identical time stamps */

			local date_var = "verificador_date_OAS"
			local file = "computo_verificadortrepdate_nomissings_oasorder"
		}

		* 5a. Transmission date, dropping missings
		else if "`x'"=="5a" {
			use `computo_data_3a', clear
			drop if trep_missing_date_ulttrans == 1
			local date_var = "trep_date_ulttrans"
			local file = "computo_ulttranstrepdate_nomissings"
		}
		
		* 5b. Transmission date, missings at the municipio median
		else if "`x'"=="5b" {
			use `computo_data_3a', clear
			bys Pais Dep Prov Muni: egen median_date = median(trep_date_ulttrans)
			replace trep_date_ulttrans = median_date if trep_missing_date == 1
			drop median_date	
			local date_var = "trep_date_ulttrans"
			local file = "computo_ulttranstrepdate_missingsatmunmedian"
		}

		* Within ties, sort randomly for all data sets except 4c.
		if "`x'"!="4c" {
			gen x = runiform()
		}

		* Measures that DO USE Blancos and Nulos (sorted)
		gen total_so = CC + FPV + MTS + UCS + MAS + F + PDC + MNR + PANBOL + ///
			Blancos + Nulos
		*---*
		gen mshare_so = MAS / total_so
		gen cshare_so = CC / total_so
		sort `date_var' x
		gen cumsumtotal_so = sum(total_so)
		*---*
		egen maxcumsumtotal_so = max(cumsumtotal_so)
		gen pcs_so = cumsumtotal_so / maxcumsumtotal_so /* Percentile of vote counted */
		drop cumsumtotal_so maxcumsumtotal_so

		* Measures that DO NOT USE Blancos and Nulos (sorted)
		gen total_nbnn = CC + FPV + MTS + UCS + MAS + F + PDC + MNR + PANBOL
		*---*
		gen mshare_nbnn_so = MAS / total_nbnn
		gen cshare_nbnn_so = CC / total_nbnn
		sort `date_var' x
		drop x
		gen cumsumtotal_nbnn = sum(total_nbnn)
		*---*
		egen maxcumsumtotal_nbnn = max(cumsumtotal_nbnn)
		gen pcs_nbnn_so = cumsumtotal_nbnn / maxcumsumtotal_nbnn 
		drop cumsumtotal_nbnn maxcumsumtotal_nbnn
		
		* There is one pair of recinctos (precincts) with identical names
		* in the same locality/municipality/province/department.
		* Here we change the name so as to distinguish them
		replace Reci = "Unidad Educativa Privada La Paz 1" if ///
			Reci == "Unidad Educativa Privada La Paz" ///
			& Muni == "Nuestra Señora de La Paz" ///
			& Prov == "Murillo" ///
			& Dep == "La Paz" ///
			& NumMesa>=22383 & NumMesa<=22386
		replace Reci = "Unidad Educativa Privada La Paz 2" if ///
			Reci == "Unidad Educativa Privada La Paz" ///
			& Muni == "Nuestra Señora de La Paz" ///
			& Prov == "Murillo" ///
			& Dep == "La Paz" ///
			& NumMesa>22386	
		
		* Save
		compress
		sort Pais Dep Prov Muni Loc Reci
		saveold "data/outputs//`file'.dta", ///
			replace
	}


*---------------------------------------------------*
* 6. Adding covariates to two of the data sets 		*
*---------------------------------------------------*
	do "codes/1_Data_part2_covariates.do"
	tempfile covariates_full
	save `covariates_full'

	foreach x in computo_ulttranstrepdate_missingsatmunmedian ///
		computo_ulttranstrepdate_nomissings {
		use "data/outputs//`x'.dta", clear

		merge 1:1 NumMesa using `covariates_full'
		drop if _merge == 2
		drop _merge

		compress
		saveold "data/outputs//`x'.dta", replace
	}

*------------------------------------------------------------------------*
* 7. Label variables + Sample data (if specified above)                  *
*------------------------------------------------------------------------*
	# delimit ;
		global datasets "computo_ulttranstrepdate_missingsatmunmedian
			computo_ulttranstrepdate_nomissings
			computo_verificadortrepdate_missingsatthend
			computo_verificadortrepdate_nomissings
			computo_verificadortrepdate_nomissings_oasorder";
	# delimit cr

	* Label variables; sampling if the user specified this option above
	foreach x of global datasets {
		use "data/outputs//`x'.dta", clear
		run "codes/auxiliary_programs/3_VariableLabels.do"

		saveold "data/outputs//`x'.dta", replace
	}

*------------------------------------------------------------------------------*
timer off 2
timer list 2

log close
clear all
